options ls=85 nocenter;
libname save '/bbkinghome/sschaner/Angrist Work/Web Papers/Angrist_2002/immig/1900/sasdata';
filename moulton3 '/bbkinghome/sschaner/Angrist Work/Web Papers/AcemogluAngrist_2000/SAS programs/moulton3.sas';


/* table5w: first-stage for women (includes GQ obs, OCCSCORE=0 for institution)

   6-19-00 from firstwom2

   6-18-00 look at models with older*code

   female first stage and RF's: uses data94b

   revised 6-14-00 to use corrected income score imputation

   from firstwom1
   6-4-00 RETYPED

   1910,1920,1940 focus

   firstwom1 reads save.rd1900m6 or save.rd1900m8

   note: rd1900m6 uses mother, father-or-both CODES
         rd1900m8 uses father, mother-or-both CODES
         (use m6 for 40/60 follow-ups)
         data94b uses father, mother-or-both, codes mtongue hebrew

   note: 6-14-00 version of data94b omits hebrew

*/


* HERE ARE THE BITS OF CODE THAT DIFFER FOR MEN AND WOMEN;
%macro women1;
	if female=1 then age=age+2;
	if (20<=age<=50);                  
	older=(age=>36);
	%mend;
%macro men1;
	if female=0 then age=age-2;
	if (18<=age<=48);                  
	older=(age=>34);
	%mend;
	
%macro women2;
	if (91 le year le 94) and (2 le nativity le 4) and female=1;
	if (18 le age le 48);
	older= (age=>34);
	%mend;
%macro men2;
	if (91 le year le 94) and (2 le nativity le 4);
	if (20 le age le 50);
	older= (age=>36);
	%mend;
	
%macro women3;
	array ages(30) age19-age48;
	array i2ages(30) i2age19-i2age48;
	array i4ages(30) i4age19-i4age48;
		do j=1 to 30;
			ages(j)= (age= (18+j));
			i2ages(j)= ages(j)*(year=92);
			i4ages(j)= ages(j)*(year=94);
			end;
	%mend;
%macro men3;
	array ages(30) age21-age50;
	array i2ages(30) i2age21-i2age50;
	array i4ages(30) i4age21-i4age50;
		do j=1 to 30;
			ages(j)= (age= (20+j));
			i2ages(j)= ages(j)*(year=92);
			i4ages(j)= ages(j)*(year=94);
			end;
	%mend;
	
%macro women4; title 'WOMEN'; %mend;
%macro men4; title 'MEN'; %mend;
	
* CODE FOR DIFFERENT SAMPLES - FOREIGN STOCK OR FOREIGN BORN;
%macro c1;
		if (nativity=5) and (91<=year<=94);
		%mend;
%macro c2;
		if (2 le nativity le 5) and (91 le year le 94);
		fb= nativity=5;
		%mend;
%macro d1;
	var female;
	 id cohort;
	by year code older;
	output out=fb2 mean=pctfem sumwgt=nimmig;
	%mend;
%macro d2;
	var female fb;
	id cohort;
	by year code older;
	output out=fb2 mean=pctfem pctfb sum=nwomen nimmig sumwgt=nimmig;
	%mend;
	
* START MACRO;

%macro makedata(gender,val,cval);
data fb1;                               ** potential MALE fb spouses; 
 set save.data11b;

 %c&cval;
 
 %&gender.1; /*INVOKE  SPECIFIC CODE*/

 cohort=(1*(year=91))+(2*(year=92))+(3*(year=94));
 cohort=cohort+(older/10);
 
proc sort data=fb1;
 by year code older;

proc summary data=fb1;
 weight slwt;
 %d&cval.;

proc print;
title 'first-stage sex ratios for 1910,1920,1940';

data nber;
	set save.irates3;
	year=(year-1000)/10;
	ilnimmig= log(itotal);
	
proc sort data=nber;
	by year code;
	
proc print data=nber;
	by year code;
	
proc print data=nber;
	title 'nber data';
	
data fb2;
	merge fb2 nber;
	by year code;
	if year=94 and older=0 then iratio2=1;
		else iratio2=iratio;
		
***********************;
*** micro data step ***;
***********************;

data two; 	** micro FEMALE sample for follow-ups;
set save.data11b;
	
	%&gender.2; /*EXECUTE GENDER SPECIFIC CODE*/
	
	cohort=(1*(year=91))+(2*(year=92))+(3*(year=94));
	cohort=cohort+(older/10);
	
	if cohort ne .;

proc sort data=two;
	by year code older;
	
data three;
	merge two fb2;
	by year code older;
	if (age ne .);
	
	women=pctfem*nimmig; lnwom=log(women);
	men=(1-pctfem)*nimmig; lnmen=log(men);
	ratio=men/women;
	
	lnimmig=log(nimmig);
	lnratio=log(ratio);
	
	label ratio='men/women among fb';
	
	** outcomes **;
	
	married= 1 le marst le 2;
	evermar= marst<6;
		if evermar=1 then alone=(2 le marst le 4);
		spsepres= marst=1;
		inlf= labforce=2;
		anykids= nchild>0;
		working= (10 le empstatd le 15);
		if year=92 then working=.;
		
		** relationship outcomes (m8 only) **;
		
		femhead= relateg=1;
		
	mominhh= (1 le momrule le 3);
	
	** earnings variables ((;
	** coding that was below plus related is now done by rdm9c **;

	altdrop= ((code=0 or (9 le code le 10))*(95 le year le 96));
	altdrop2= (9 le code le 10)*(95 le year le 96);
	run;
	
	data &gender.;
	set three;
	if female=&val; 
	if relateg ne 13;
	
	** scaled earnings variables **;
	
	if marst=1 then spsinc=spscore; else spsinc=0;
	twoscor= occscore+spsinc;
	
	if spsinc>0 then lnspsinc= log(spsinc);
	if twoscor>0 then lntwo= log(twoscor);
	
	escale3= (1+nchild+(1 le sprule le 5))**.6;
	escale3b= (1+(.5*nchild)+(.9*(2 le sprule le 5)));
	
	famscr1= famscore/myfamsiz;
	famscr2= famscore/famsize;
	
	if famscore>0 then do;
		lnfscore= log(famscore);
		lnfscr1= log(famscr1);
		lnfscr2= log(famscr2);
	end;
	
	famscr3= twoscor/escale3;
	famscr3b= twoscor/escale3b;
	
	if twoscor>0 then lnfscr3= log(famscr3);
	
	label famscr1= 'family income per adult size'
		famscr2= 'family income per family member'
		famscr3= 'parents inc. per nuke fam. member';
		
	kid5wt= slwt*nchlt5;
	kidallwt= slwt*nchild;
	
	*** code dummies ***;
	
	length code1-code10 age19-age48 native2 native3 year2 year4 3;
	
	year2= year=92;
	year4= year=94;
	
	native2= nativity=2;
	native3= nativity=3;
	
	code1= code=1; code2= code=2;
	code3= code=3; code4= code=4;
	code5= code=5; code6= code=6;
	code7= code=7; code8= code=8;
	code9= code=9; code10= code=10;
	
%&gender.3; /*INVOKE GENDER SPECIFIC CODE*/
			
	clusid= year+(code/100);
	
	if year ne 94 then do;
		sps2nd= 2 le spsnativ le 4;
		spsnat= spsnativ=1;
		spsfb= spsnativ=5;
	end;
	
	keep ratio lnimmig iratio ilnimmig slwt clusid year age code nativity cohort
		older native2 native3 year2 year4 code1-code10 age19-age50 
		i2age19-i2age50 i4age19-i4age50;
	run;
	
data y&gender.;
	set &gender.;
	if older=0;
	
data o&gender.;
	set &gender.;
	if older=1;
run;
	
*********************************************;
*** ols, first-stage, 2sls	*********;
*********************************************;

proc glm data=y&gender;
	%&gender.4;
	title2 'FIRST STAGE: slwt-weighted, YOUNG';
	weight slwt;
	class year age code nativity cohort;
	model ratio lnimmig =
		nativity iratio ilnimmig year*age code/ solution;
		output out=&gender.2 p=pratio pimmig;
	run;
	
proc glm data=o&gender.;
	%&gender.4;
	title2 'FIRST STAGE: slwt-weighted, OLD';
	weight slwt;
	class year age code nativity cohort older;
	model ratio lnimmig =
		nativity iratio ilnimmig year*age code/ solution;
		output out=&gender.2 p=pratio pimmig;
	run;

***************************************;
*** Moulton Corrections ***;
***************************************;
proc sort data=y&gender.;
	by clusid;
proc sort data=o&gender.;
	by clusid;
%mend makedata;	

%macro bigmoulton(gender,subset,reg);
	%macro minimoulton(dvar);

%let depvar=&dvar;

proc reg data=&subset.;
	title %&gender.4;
	title2 'FIRST STAGE: slwt-weighted -- Compare this to GLM';
	weight slwt;
	model &depvar= iratio ilnimmig %&reg.;
	output out=test2 r=v_ij;
	run;

data test2;
	set test2;
	v_ij= v_ij*sqrt(slwt);
	
%let step=1;
%let k=62;
%let randvar= clusid;
%include moulton3;

proc datasets; delete test2; run;

proc reg data=&subset.;
	weight slwt;
		model iratio= ilnimmig %&reg.;
		output out=test2 r=v_ij;
		run;

data test2;
	set test2;
	v_ij= v_ij*sqrt(slwt);
	
%let step=2;
%let k=61;
%let randvar= clusid;
%include moulton3;
		
proc datasets;
	delete test2;

proc summary data=&subset.;
	by clusid;
	var &depvar; output out=sizeds n=size;
	
proc summary data=sizeds;
	var size;
	output out=sizeds mean=m var=v;
	
data calc;
	merge ds1 ds2 sizeds;
		drop dummy _type_;
	gfac=sqrt(1+(((v/m)+(m-1))*rho2*rho1));
	
proc print; title 'moulton correction factors';

proc reg data=&subset.;
		weight slwt;
			model ilnimmig= iratio %&reg.;
			output out=test2 r=v_ij;
			
data test2;
	set test2;
	v_ij= v_ij*sqrt(slwt);
	
%let step=2;
%let k=61;
%let randvar= clusid;
%include moulton3;
		
proc datasets;
delete test2;

proc summary data=&subset.;
	by clusid;
	var &depvar;
	output out=sizeds n=size;
	
proc summary data=sizeds;
	var size;
		output out=sizeds mean=m var=v;
		
data calc;
	merge ds1 ds2 sizeds;
		drop dummy _type_;
	gfac=sqrt(1+(((v/m)+(m-1))*rho2*rho1));

proc print;
	title 'moulton correction factors';
	run;
	%mend minimoulton;
	
	%minimoulton(ratio); %minimoulton(lnimmig);

%mend bigmoulton;

* DIFFERENT REGRESSION LISTS!!!;
	* USE w WITH WOMEN, m WITH MEN;
%macro regsw; native2 native3 year2 year4 code1-code10 age19-age48 
	i2age19-i2age48 i4age19-i4age48;
	%mend;	
%macro regsow; native2 native3 year2 year4 code1-code10 age35-age48
	i2age35-i2age48 i4age35-i4age48;
	%mend;
%macro regsm; native2 native3 year2 year4 code1-code10 age21-age35 
	i2age21-i2age35 i4age21-i4age35;
	%mend;	
%macro regsom; native2 native3 year2 year4 code1-code10 age37-age50
	i2age37-i2age50 i4age37-i4age50;
	%mend;

%makedata(women,1,1);
%bigmoulton(women,ywomen,regsw);
%bigmoulton(women,owomen,regsow);

%makedata(women,1,2);
%bigmoulton(women,ywomen,regsw);
%bigmoulton(women,owomen,regsow);

%makedata(men,0,1);	
%bigmoulton(men,ymen,regsm);
%bigmoulton(men,omen,regsom);

%makedata(men,0,2);	
%bigmoulton(men,ymen,regsm);
%bigmoulton(men,omen,regsom);
